Load Packages

library(ggplot2)
library(readr)
library(ggmap)
library(maps)
library(stringr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:ggmap':
## 
##     inset
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(rgeos)
## rgeos version: 0.3-23, (SVN revision 546)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Linking to sp version: 1.2-4 
##  Polygon checking: TRUE
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(RColorBrewer)

PRISM uses image-based file storage, which means we need to work with two file types:

bil files (band interleaved by lines)

hdr files (associated header file)

Minimum Temperatures

filenames<-list.files("./data/PRISM/min_temp")
filenames<-filenames[grep(".*\\.bil$",filenames)]
filenames<-gsub("PRISM","\\./data/PRISM/min_temp/PRISM",filenames)

require(maptools)
require(raster)

mean2<-function(x){return(mean(x,na.rm=T))}
US_Counties<-readShapePoly("./data/US/US_County_2000_2004_geo")
US_Counties_Polygons<-as(US_Counties,"SpatialPolygons")

#Minimum Temperature
r<-raster(filenames[1])
z<-raster::extract(r,US_Counties_Polygons)
min_temp<-as.data.frame(cbind(rep(2000,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(z,mean2))))
names(min_temp)<-c("year","fips","av")

for (i in 2:16){
  r<-raster(filenames[i])
  this.year<-i+1999
  z<-raster::extract(r,US_Counties_Polygons)
  tmp<-as.data.frame(cbind(rep(this.year,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(z,mean2))))
  names(tmp)<-c("year","fips","av")
  min_temp<-rbind(min_temp,tmp)
}

Maximum Temperatures

filenames<-list.files("./data/PRISM/max_temp")
filenames<-filenames[grep(".*\\.bil$",filenames)]
filenames<-gsub("PRISM","\\./data/PRISM/max_temp/PRISM",filenames)

require(maptools)
require(raster)

mean2<-function(x){return(mean(x,na.rm=T))}
US_Counties<-readShapePoly("./data/US/US_County_2000_2004_geo")
US_Counties_Polygons<-as(US_Counties,"SpatialPolygons")

#Maximum Temperature
s<-raster(filenames[1])
y<-raster::extract(s,US_Counties_Polygons)
max_temp<-as.data.frame(cbind(rep(2000,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(y,mean2))))
names(max_temp)<-c("year","fips","av")

for (i in 2:16){
  s<-raster(filenames[i])
  this.year<-i+1999
  y<-raster::extract(s,US_Counties_Polygons)
  tmp1<-as.data.frame(cbind(rep(this.year,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(y,mean2))))
  names(tmp1)<-c("year","fips","av")
  max_temp<-rbind(max_temp,tmp1)
}

Convert dataframes to .csv

write.csv(min_temp,file="MinTemp")
write.csv(max_temp,file="MaxTemp")

Read in additional data

#CDC data
library(readr)
ld<-read_csv("./data/CDC/ld-case-counts-by-county-00-15.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   STNAME = col_character(),
##   CTYNAME = col_character()
## )
## See spec(...) for full column specifications.
#Census
pop<-read_csv("./data/CENSUS/county_population.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   fips = col_character(),
##   areaname = col_character(),
##   state_name = col_character(),
##   county_name = col_character(),
##   fipsst = col_character(),
##   fipsco = col_character()
## )
## See spec(...) for full column specifications.
#Load all PRISM data
load("get_PRISM.Rda")
mintemp<-read.csv("MinTemp")
maxtemp<-read.csv("MaxTemp")

#Load saved dataframes
load("get_DATA.Rda")

#Remove X column
mintemp$X<-NULL
maxtemp$X<-NULL

Pop TidyVerse Conversion

library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(magrittr)
library(dplyr)

pop %<>% select(fips,state_name,starts_with("pop2"))
pop %<>% gather(starts_with("pop2"),key="str_year",value="size") %>% na.omit
pop %<>% mutate(year=gsub("pop","",str_year))
pop %<>% mutate(year=as.numeric(year))
pop %<>% mutate(fips=gsub("^0","",fips))
pop %<>% mutate(fips=as.integer(fips))
pop$str_year<-NULL
names(pop)[names(pop) == 'size'] <- 'pop'

LD TidyVerse Conversion

ld %<>% gather(starts_with("Cases"),key="str_year",value="cases") 
ld %<>% mutate(year=gsub("Cases","",str_year)) 
ld %<>% mutate(year=as.numeric(year))
ld %<>% rename(state=STNAME,county=CTYNAME)

fips.builder<-function(st,ct){
  if (nchar(ct)==3){
    fips<-paste(as.character(st),as.character(ct),sep="") %>% as.integer
  }
  else if (nchar(ct)==2){
    fips<-paste(as.character(st),"0",as.character(ct),sep="") %>% as.integer
  }
  else {
    fips<-paste(as.character(st),"00",as.character(ct),sep="") %>% as.integer
  }
  return(fips)
}

ld %<>% rowwise() %>% mutate(fips=fips.builder(STCODE,CTYCODE)) 

ld %<>% select(-c(STCODE,CTYCODE,str_year))

Merge all data frames

#Join prism prcp and avtmp with ld 
ld.total<-inner_join(ld,prism)
#Join with mintemp and rename av column to mintmp
ld.total<-inner_join(ld.total,mintemp)
names(ld.total)[names(ld.total) == 'av'] <- 'mintmp'
#Join with maxtemp and rename av column to maxtmp
ld.total<-inner_join(ld.total,maxtemp)
names(ld.total)[names(ld.total) == 'av'] <- 'maxtmp'
#Join with pop
ld.pop<-inner_join(ld.total,pop)
## Joining, by = c("year", "fips")
ld.pop$state_name<-NULL


## CENSES data only goes until 2014, so obs go from 49744 to 46630

countyTick

#Compare countyTick and ld.total county names to ensure congruence for final merge: countyTick contains 3080 obs, so missing 29 counties, rename county and state to County and State

names(ld.total)[names(ld.total) == 'county'] <- 'County'
names(ld.total)[names(ld.total) == 'state'] <- 'State'

countyTick<-read_csv("./data/TICK/countyTick.csv")
#countyTick data frame lists only county name, ld.total lists "County" following county name
ld.total$County<-gsub(" County","",ld.total$County)
ld.total$County<-gsub(" Parish","",ld.total$County)

#Need to remove some unecessary columns, only need first 3 and tick_presence
countyTick$State_FIPS<-NULL
countyTick$County_FIPS<-NULL
countyTick$fipsname<-NULL
countyTick$polyname<-NULL

merging issues

ld.total$SC<-paste(ld.total$County,ld.total$State,sep=",")


# repeats in countyTick - solution:
FIPScounter<-table(countyTick$fips)
FIPScounter<-FIPScounter[FIPScounter>1]
problemFIPS<-as.integer(names(FIPScounter))
problemFIPS.locs<-NULL
for (k in problemFIPS){
  locs<-which(countyTick$fips==k)
  locs<-locs[-1]
  problemFIPS.locs<-c(problemFIPS.locs,locs)
}
countyTick<-countyTick[-problemFIPS.locs,]

## VA cities - TO BE REVISITED: JL "CITY PROBABLY HAS HIGH CASE NUMBERS SO DON"T THROW DATA AWAY IF WE DON"T HAVE TO"
tmp1<-subset(ld.total,year==2008 & State=="Virginia")
tmp2<-subset(countyTick,State=="Virginia")
setdiff(tmp1$County,tmp2$County)
addCity<-setdiff(tmp2$County,tmp1$County)

for (i in 1:dim(countyTick)[1]){
  if (countyTick$State[i]=="Virginia"){
    if (countyTick$County[i] %in% addCity){
      countyTick$County[i]<-paste(countyTick$County[i],"city",sep=" ")
    }
  }
}


#Fixing Inconsistencies
countyTick %<>% mutate(County=str_replace_all(County,"Mountrial","Mountrail"))
countyTick %<>% mutate(County=str_replace_all(County,"Miami Dade","Miami-Dade"))
countyTick %<>% mutate(County=str_replace_all(County,"De Kalb","DeKalb"))
ld.total %<>% mutate(County=str_replace_all(County,"District of Columbia","Washington"))
countyTick %<>% mutate(County=str_replace_all(County,"De Soto","DeSoto"))
ld.total %<>% mutate(County=str_replace_all(County,"De Soto","DeSoto"))
countyTick %<>% mutate(County=str_replace_all(County,"Du Page","DuPage"))
countyTick %<>% mutate(County=str_replace_all(County,"La Salle","LaSalle"))
ld.total %<>% mutate(County=str_replace_all(County,"La Salle","LaSalle"))
countyTick %<>% mutate(County=str_replace_all(County,"Lagrange","LaGrange"))
countyTick %<>% mutate(County=str_replace_all(County,"La Porte","LaPorte"))
countyTick %<>% mutate(County=str_replace_all(County,"LaFourche","Lafourche"))
countyTick %<>% mutate(County=str_replace_all(County,"Prince Georges","Prince George's"))
countyTick %<>% mutate(County=str_replace_all(County,"Queen Annes","Queen Anne's"))
countyTick %<>% mutate(County=str_replace_all(County,"St. Marys","St. Mary's"))
countyTick %<>% mutate(County=str_replace_all(County,"Lac Qui Parle","Lac qui Parle"))
ld.total %<>% mutate(County=str_replace_all(County,"Do\U3e34613ca Ana","Dona Ana"))
countyTick %<>% mutate(County=str_replace_all(County,"La Moure","LaMoure"))
countyTick %<>% mutate(County=str_replace_all(County,"De Witt","DeWitt"))
ld.total %<>% mutate(County=str_replace_all(County,"De Witt","DeWitt"))
countyTick %<>% mutate(County=str_replace_all(County,"Fond Du Lac","Fond du Lac"))

# Adding missing counties to countyTick

countyTick.add<-rbind(countyTick,c(55078,"Menominee","Wisconsin",2))
countyTick.add<-rbind(countyTick.add,c(4012,"La Paz","Arizona",1))
countyTick.add<-rbind(countyTick.add,c(8014,"Broomfield","Colorado",1))
countyTick<-countyTick.add

countyTick$SC<-paste(countyTick$County,countyTick$State,sep=",")
ld.total$SC<-paste(ld.total$County,ld.total$State,sep=",")

# Adding missing cities to countyTick
newCity<-data.frame(fips=c(24510,29510,51510,51515,51520,51530,51540,51550,51570,51580,51590,51595,51600,51610,51620,51630,51640,51660,51670,51678,51680,51683,51685,51690,51720,51730,51735,51740,51750,51760,51770,51775,51790,51820,51830,51840),County=c("Baltimore city","St. Louis city","Alexandria city","Bedford city","Bristol city","Buena Vista city","Charlottesville city","Chesapeake city","Colonial Heights city","Covington city","Danville city","Emporia city","Fairfax city","Falls Church city","Franklin city","Fredericksburg city","Galax city","Harrisonburg city","Hopewell city","Lexington city","Lynchburg city","Manassas city","Manassas Park city","Martinsville city","Norton city","Petersburg city","Poquoson city","Portsmouth city","Radford city","Richmond city","Roanoke city","Salem city","Staunton city","Waynesboro city","Williamsburg city","Winchester city"),State=c("Maryland","Missouri","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia"),Tick_presence=c(2,1,2,1,1,2,2,2,2,1,2,1,1,1,2,2,2,2,2,2,2,2,2,1,1,1,2,1,2,2,2,2,2,2,1,2))


newCity$SC<-paste(newCity$County,newCity$State,sep=",")
countyTick<-rbind(newCity,countyTick)

setdiff(ld.total$County,countyTick$County)

# No differences in dataframes

Merge ld.total and countyTick

countyTick$fips<-as.numeric(countyTick$fips)
countyTick$County<-as.character(countyTick$County)
countyTick$State<-as.character(countyTick$State)
countyTick$Tick_presence<-as.numeric(countyTick$Tick_presence)

ld.tick<-inner_join(ld.total,countyTick)

Convert Tick_presence to 0 (1 or 3) or 1 (2 or 4)

ld.tick$Presence_current<-ld.tick$Tick_presence
ld.tick$Presence_current<-ifelse(ld.tick$Presence_current%in%c(1,3),0,1)

Do cases occur only where ticks are present?

ld.tick %>% filter(cases>0) %>% ggplot(aes(x=as.factor(Presence_current),y=cases))+geom_boxplot()+scale_y_log10()

County_map & ld.tick discrepencies

county_map<-map_data("county")
county_map %<>% mutate(region=str_to_title(region))
county_map %<>% mutate(subregion=str_to_title(subregion))

county_map %<>% mutate(subregion=str_replace_all(subregion,"De Kalb","DeKalb"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Clair","St. Clair"))  
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Francis","St. Francis"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Soto","DeSoto"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Johns","St. Johns"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Lucie","St. Lucie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcduffie","McDuffie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcintosh","McIntosh"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Salle","LaSalle"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdonough","McDonough"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mchenry","McHenry"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mclean","McLean"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lagrange","LaGrange"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Porte","LaPorte"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Joseph","St. Joseph"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Obrien","O'Brien"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcpherson","McPherson"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccracken","McCracken"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccreary","McCreary"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Bernard","St. Bernard"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Charles","St. Charles"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Helena","St. Helena"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St James","St. James"))

county_map %<>% mutate(subregion=str_replace_all(subregion,"St John The Baptist","St. John the Baptist"))

county_map %<>% mutate(subregion=str_replace_all(subregion,"St Landry","St. Landry"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Martin","St. Martin"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Mary","St. Mary"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Tammany","St. Tammany"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Prince Georges","Prince George's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Queen Annes","Queen Anne's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St. Marys","St. Mary's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Baltimore City","Baltimore city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lac Qui Parle","Lac qui Parle"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lake Of The Woods","Lake of the Woods"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcleod","McLeod"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Louis","St. Louis"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdonald","McDonald"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Francois","St. Francois"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St. Louis City","St. Louis city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Ste Genevieve","Ste. Genevieve"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lewis And Clark","Lewis and Clark"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccone","McCone"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Yellowstone National","Yellowstone"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckinley","McKinley"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Lawrence","St. Lawrence"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdowell","McDowell"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Moure","LaMoure"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckenzie","McKenzie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcclain","McClain"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccurtain","McCurtain"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckean","McKean"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccormick","McCormick"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccook","McCook"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcminn","McMinn"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcnairy","McNairy"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcculloch","McCulloch"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mclennan","McLennan"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcmullen","McMullen"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Isle Of Wight","Isle of Wight"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"King And Queen","King and Queen"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Newport News","Newport News city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Virginia Beach","Virginia Beach city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Fond Du Lac","Fond du Lac"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Croix","St. Croix"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Witt","DeWitt"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Du Page","DuPage"))

#Changing Virginia cities to associated counties

ld.tick.county<-ld.tick

ld.tick.county%<>% mutate(County=str_replace_all(County,"Alexandria city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Bedford city","Bedford"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Bristol city","Washington"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Buena Vista city","Rockbridge"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Charlottesville city","Albemarle"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Chesapeake city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Colonial Heights city","Dinwiddie"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Covington city","Alleghany"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Danville city","Pittsylvania"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Emporia city","Greensville"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Fairfax city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Falls Church city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Franklin city","Accomack"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Fredericksburg city","Spotsylvania"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Galax city","Carroll"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Hampton city","Hampton"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Harrisonburg city","Rockingham"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Hopewell city","Prince George"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Lexington city","Rockbridge"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Lynchburg city","Campbell"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Manassas city","Prince William"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Manassas Park city","Prince William"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Martinsville city","Henry"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Norfolk city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Norton city","Wise"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Petersburg city","Dinwiddie"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Poquoson city","York"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Portsmouth city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Radford city","Montgomery"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Richmond city","Henrico"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Roanoke city","Roanoke"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Salem city","Roanoke"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Staunton city","Augusta"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Suffolk city","Suffolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Waynesboro city","Augusta"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Williamsburg city","James City"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Winchester city","Frederick"))
                                                
ld.tick.county$SC<-paste(ld.tick.county$County,ld.tick.county$State,sep=",")
county_map %<>% mutate(SC=paste(subregion,region,sep=","))

setdiff(county_map$subregion,ld.tick.county$County)
setdiff(ld.tick.county$County,county_map$subregion)

Where do 0 tick presence Lymes Disease cases occur geographically?

ld.av.edit<-aggregate(ld.tick.county$cases,by=list(ld.tick.county$SC,ld.tick.county$Presence_current),FUN=mean)
names(ld.av.edit)<-c("SC","Presence_current","av.cases")
tmp1<-inner_join(county_map,ld.av.edit)
## Joining, by = "SC"
case.0<-subset(tmp1,Presence_current==0)

case.2<-subset(case.0,av.cases>2)
case.1<-subset(case.0,av.cases>1)
case.1<-unique(case.1)

case.2<-unique(case.2)
#[1] "Maricopa,Arizona"          "Pima,Arizona"             
 #[3] "Alameda,California"        "Contra Costa,California"  
 #[5] "Humboldt,California"       "Los Angeles,California"   
 #[7] "Marin,California"          "Mendocino,California"     
 #[9] "Nevada,California"         "Riverside,California"     
#[11] "San Diego,California"      "San Francisco,California" 
#[13] "San Mateo,California"      "Santa Barbara,California" 
#[15] "Santa Clara,California"    "Santa Cruz,California"    
#[17] "Sonoma,California"         "Marion,Indiana"           
#[19] "Starke,Indiana"            "Johnson,Kansas"           
#[21] "Allegan,Michigan"          "Kent,Michigan"            
#[23] "Ottawa,Michigan"           "Washtenaw,Michigan"       
#[25] "Clearwater,Minnesota"      "Lake,Minnesota"           
#[27] "Polk,Minnesota"            "Douglas,Nebraska"         
#[29] "Lancaster,Nebraska"        "Clark,Nevada"             
#[31] "Washoe,Nevada"             "Cass,North Dakota"        
#[33] "Grand Forks,North Dakota"  "Delaware,Ohio"            
#[35] "Franklin,Ohio"             "Hamilton,Ohio"            
#[37] "Montgomery,Ohio"           "Douglas,Oregon"           
#[39] "Jackson,Oregon"            "Josephine,Oregon"         
#[41] "Multnomah,Oregon"          "Allegheny,Pennsylvania"   
#[43] "Armstrong,Pennsylvania"    "Beaver,Pennsylvania"      
#[45] "Butler,Pennsylvania"       "Clarion,Pennsylvania"     
#[47] "Fayette,Pennsylvania"      "Indiana,Pennsylvania"     
#[49] "Lawrence,Pennsylvania"     "Mercer,Pennsylvania"      
#[51] "Somerset,Pennsylvania"     "Washington,Pennsylvania"  
#[53] "Westmoreland,Pennsylvania" "Brown,Texas"              
#[55] "Frederick,Virginia"        "James City,Virginia"      
#[57] "Pulaski,Virginia"          "Rockbridge,Virginia"      
#[59] "Shenandoah,Virginia"       "Warren,Virginia"          
#[61] "King,Washington"          
 
ggplot(tmp1,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(tmp1$Presence_current==0,"red","black"),fill=ifelse(tmp1$av.cases>2,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)#Counties outlined in red and filled with gold have 0 tick presence but av.cases>2

When do these cases occur?

tmp<-aggregate(ld.tick.county$prcp,by=list(ld.tick.county$SC,ld.tick.county$Presence_current,ld.tick.county$year,ld.tick.county$cases),FUN=mean)
names(tmp)<-c("SC","Presence_current","year","cases","av.prcp")

#2000: Washington Oregon, northeast, Florida and Texas area lacking cases 
tmp2000<-subset(tmp,year==2000)
map.2000<-inner_join(tmp2000,county_map)
## Joining, by = "SC"
ggplot(map.2000,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2000$Presence_current==0,"red","black"),fill=ifelse(map.2000$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2001: some case expansion near Oregon and Texas
tmp2001<-subset(tmp,year==2001)
map.2001<-inner_join(tmp2001,county_map)
## Joining, by = "SC"
ggplot(map.2001,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2001$Presence_current==0,"red","black"),fill=ifelse(map.2001$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2002: same as 2001 with some Northeast expansion
tmp2002<-subset(tmp,year==2002)
map.2002<-inner_join(tmp2002,county_map)
## Joining, by = "SC"
ggplot(map.2002,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2002$Presence_current==0,"red","black"),fill=ifelse(map.2002$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2003: Northeast expansion W-O regression
tmp2003<-subset(tmp,year==2003)
map.2003<-inner_join(tmp2003,county_map)
## Joining, by = "SC"
ggplot(map.2003,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2003$Presence_current==0,"red","black"),fill=ifelse(map.2003$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2004: California and north (Michigan) expansion
tmp2004<-subset(tmp,year==2004)
map.2004<-inner_join(tmp2004,county_map)
## Joining, by = "SC"
ggplot(map.2004,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2004$Presence_current==0,"red","black"),fill=ifelse(map.2004$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2005: same as 2004 except loss of one midwwest county and Texas expansion**
tmp2005<-subset(tmp,year==2005)
map.2005<-inner_join(tmp2005,county_map)
## Joining, by = "SC"
ggplot(map.2005,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2005$Presence_current==0,"red","black"),fill=ifelse(map.2005$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2006: Texas, northern, and California regression** 
tmp2006<-subset(tmp,year==2006)
map.2006<-inner_join(tmp2006,county_map)
## Joining, by = "SC"
ggplot(map.2006,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2006$Presence_current==0,"red","black"),fill=ifelse(map.2006$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2007: Still no Texas, Southwest expansion
tmp2007<-subset(tmp,year==2007)
map.2007<-inner_join(tmp2007,county_map)
## Joining, by = "SC"
ggplot(map.2007,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2007$Presence_current==0,"red","black"),fill=ifelse(map.2007$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2008: Texas expansion, a lot of northwest expansion**
tmp2008<-subset(tmp,year==2008)
map.2008<-inner_join(tmp2008,county_map)
## Joining, by = "SC"
ggplot(map.2008,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2008$Presence_current==0,"red","black"),fill=ifelse(map.2008$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2009: Southern California expasion, Texas maintanence**
tmp2009<-subset(tmp,year==2009)
map.2009<-inner_join(tmp2009,county_map)
## Joining, by = "SC"
ggplot(map.2009,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2009$Presence_current==0,"red","black"),fill=ifelse(map.2009$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2010: Southern Cali regression
tmp2010<-subset(tmp,year==2010)
map.2010<-inner_join(tmp2010,county_map)
## Joining, by = "SC"
ggplot(map.2010,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2010$Presence_current==0,"red","black"),fill=ifelse(map.2010$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2011: Texas regression, Southern Cali and W-O expansion
tmp2011<-subset(tmp,year==2011)
map.2011<-inner_join(tmp2011,county_map)
## Joining, by = "SC"
ggplot(map.2011,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2011$Presence_current==0,"red","black"),fill=ifelse(map.2011$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2012: Northwest and Southern Cali regression
tmp2012<-subset(tmp,year==2012)
map.2012<-inner_join(tmp2012,county_map)
## Joining, by = "SC"
ggplot(map.2012,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2012$Presence_current==0,"red","black"),fill=ifelse(map.2012$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2013: A lot of mid west expansion**
tmp2013<-subset(tmp,year==2013)
map.2013<-inner_join(tmp2013,county_map)
## Joining, by = "SC"
ggplot(map.2013,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2013$Presence_current==0,"red","black"),fill=ifelse(map.2013$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2014: midwest regression
tmp2014<-subset(tmp,year==2014)
map.2014<-inner_join(tmp2014,county_map)
## Joining, by = "SC"
ggplot(map.2014,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2014$Presence_current==0,"red","black"),fill=ifelse(map.2014$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2015: same as 2014
tmp2015<-subset(tmp,year==2015)
map.2015<-inner_join(tmp2015,county_map)
## Joining, by = "SC"
ggplot(map.2015,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2015$Presence_current==0,"red","black"),fill=ifelse(map.2015$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

Labels for counties with case>1 and case>2 and tick presence=0

cnames.1 <- aggregate(cbind(long, lat) ~ SC, data=case.1, FUN=function(x) mean(range(x)))

cnames.2 <- aggregate(cbind(long, lat) ~ SC, data=case.2, FUN=function(x) mean(range(x)))

ggplot(map.2000,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2000$SC %in% cnames.1$SC,"red","black"),fill=ifelse(map.2000$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)#If outlined in red than av.case>1 and tick presence=0

Avg Prcp for important years

Important year transitions: 2005(cases)-2006(loss of cases) 2008-2009: big case years all around *2013: Midwest cases

#2005-2006 case comparison: 1014.887 and 1035.723 are mean of av.prcp for given year; prcp goes below avg in S.Cali & Midwest but above average in Michigan; precipitation influence only in northeast?

ggplot(map.2005,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2005$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2005$av.prcp>1014.887,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

ggplot(map.2006,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2006$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2006$av.prcp>1035.723,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2008-2009: Rainy northeast midwest, dry California

ggplot(map.2008,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2008$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2008$av.prcp>1075.407,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

ggplot(map.2009,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2009$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2009$av.prcp>1178.298,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2013: Dry year northwest, rainy for some of midwest (less than 2008-2009)

ggplot(map.2013,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2013$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2013$av.prcp>1132.778,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

Socioeconomic and deer Data

#County Classifications
class<-read.csv("~/lyme_modeling/data/CENSUS/County Classifications.csv")
#Income
income<-read.csv("~/lyme_modeling/data/CENSUS/Income.csv")
#Jobs
jobs<-read.csv("~/lyme_modeling/data/CENSUS/Jobs.csv")
#Deer
deer<-read.csv("~/lyme_modeling/data/DEER/deer_density.csv")

Jobs Tidyverse conversion: Unemployment Rate 2007-2015

jobs %<>% gather(starts_with("UnempRate"),key="str_year",value="UnemploymentRate") 
jobs %<>% mutate(year=gsub("UnempRate","",str_year)) 
jobs %<>% mutate(year=as.numeric(year))
jobs %<>% select(c(FIPS,State,County,UnemploymentRate,year))

jobs%<>% mutate(State=str_replace_all(State,"AL","Alabama"))
jobs%<>% mutate(State=str_replace_all(State,"AR","Arkansas"))
jobs%<>% mutate(State=str_replace_all(State,"AZ","Arizona"))
jobs%<>% mutate(State=str_replace_all(State,"CA","California"))
jobs%<>% mutate(State=str_replace_all(State,"CO","Colorado"))
jobs%<>% mutate(State=str_replace_all(State,"CT","Connecticut"))
jobs%<>% mutate(State=str_replace_all(State,"DE","Delaware"))
jobs%<>% mutate(State=str_replace_all(State,"FL","Florida"))
jobs%<>% mutate(State=str_replace_all(State,"GA","Georgia"))
jobs%<>% mutate(State=str_replace_all(State,"ID","Idaho"))
jobs%<>% mutate(State=str_replace_all(State,"IL","Illinois"))
jobs%<>% mutate(State=str_replace_all(State,"IN","Indiana"))
jobs%<>% mutate(State=str_replace_all(State,"IA","Iowa"))
jobs%<>% mutate(State=str_replace_all(State,"KS","Kansas"))
jobs%<>% mutate(State=str_replace_all(State,"KY","Kentucky"))
jobs%<>% mutate(State=str_replace_all(State,"LA","Louisiana"))
jobs%<>% mutate(State=str_replace_all(State,"ME","Maine"))
jobs%<>% mutate(State=str_replace_all(State,"MD","Maryland"))
jobs%<>% mutate(State=str_replace_all(State,"MA","Massachusetts"))
jobs%<>% mutate(State=str_replace_all(State,"MI","Michigan"))
jobs%<>% mutate(State=str_replace_all(State,"MN","Minnesota"))
jobs%<>% mutate(State=str_replace_all(State,"MS","Mississippi"))
jobs%<>% mutate(State=str_replace_all(State,"MO","Missouri"))
jobs%<>% mutate(State=str_replace_all(State,"MT","Montana"))
jobs%<>% mutate(State=str_replace_all(State,"NE","Nebraska"))
jobs%<>% mutate(State=str_replace_all(State,"NV","Nevada"))
jobs%<>% mutate(State=str_replace_all(State,"NH","New Hampshire"))
jobs%<>% mutate(State=str_replace_all(State,"NJ","New Jersey"))
jobs%<>% mutate(State=str_replace_all(State,"NM","New Mexico"))
jobs%<>% mutate(State=str_replace_all(State,"NY","New York"))
jobs%<>% mutate(State=str_replace_all(State,"NC","North Carolina"))
jobs%<>% mutate(State=str_replace_all(State,"ND","North Dakota"))
jobs%<>% mutate(State=str_replace_all(State,"OH","Ohio"))
jobs%<>% mutate(State=str_replace_all(State,"OK","Oklahoma"))
jobs%<>% mutate(State=str_replace_all(State,"OR","Oregon"))
jobs%<>% mutate(State=str_replace_all(State,"PA","Pennsylvania"))
jobs%<>% mutate(State=str_replace_all(State,"RI","Rhode Island"))
jobs%<>% mutate(State=str_replace_all(State,"SC","South Carolina"))
jobs%<>% mutate(State=str_replace_all(State,"SD","South Dakota"))
jobs%<>% mutate(State=str_replace_all(State,"TN","Tennessee"))
jobs%<>% mutate(State=str_replace_all(State,"TX","Texas"))
jobs%<>% mutate(State=str_replace_all(State,"UT","Utah"))
jobs%<>% mutate(State=str_replace_all(State,"VT","Vermont"))
jobs%<>% mutate(State=str_replace_all(State,"VA","Virginia"))
jobs%<>% mutate(State=str_replace_all(State,"WA","Washington"))
jobs%<>% mutate(State=str_replace_all(State,"WV","West Virginia"))
jobs%<>% mutate(State=str_replace_all(State,"WI","Wisconsin"))
jobs%<>% mutate(State=str_replace_all(State,"WY","Wyoming"))

jobs$SC<-paste(jobs$County,jobs$State,sep=",")
jobs$FIPS<-as.numeric(jobs$FIPS)
names(jobs)[names(jobs) == 'FIPS'] <- 'fips'

jobs_map<-inner_join(ld.tick.county,jobs) 
## Joining, by = c("State", "County", "year", "fips", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
jobs_map<-jobs_map %>% filter (Presence_current==1) 

Deer conversion

#Deer
deer<-read.csv("~/lyme_modeling/data/DEER/deer_density.csv")
deer$CntyName<-gsub(" County","",deer$CntyName)
names(deer)[names(deer)=='StateName']<-'State'
names(deer)[names(deer)=='CntyName']<-'County'
names(deer)[names(deer) == 'FIPS'] <- 'fips'
deer$SC<-paste(deer$County,deer$State,sep=",")

ld.pres.total<-inner_join(deer,ld.tick.county)
## Joining, by = c("fips", "State", "County", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
ld.jobs.total<-inner_join(deer,jobs_map)
## Joining, by = c("fips", "State", "County", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

State unemployment

#Building dataframe of average unemployment rate, average cases, total cases, and total population per state 2007-2015

u<- jobs_map %>% filter(Presence_current==1) %>% group_by(State,year) %>% summarize(av.UR=mean(UnemploymentRate),av.cases=mean(cases),tot.cases=sum(cases))
## Warning: Grouping rowwise data frame strips rowwise nature
#More than 250 cases
u250up <- u %>% filter(tot.cases>=250)

ggplot(u,aes(x=av.UR,y=tot.cases,col=as.factor(year)))+geom_point()

#Adding in population

z <- pop %>% group_by(state_name,year) %>% summarize(tot.pop=sum(pop))
z %<>% rename(State=state_name)

z <- inner_join(z,u)
## Joining, by = c("State", "year")
z %<>% mutate(incidence=(100000*(tot.cases/tot.pop)))

z_big <- z %>% filter(incidence>=5)

Unemployment plots and average case plots for same years; is there any sort of trend to help predict 2000-2006 Unemployment Rate?

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
#All of average cases per sets of 5 States

#"Alabama","Arkansas","Connecticut","Delaware","Florida"
ggplot(subset(u,State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Gerogia","Illinois","Iowa","Louisiana","Maine"
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Maryland","Massachusetts","Michigan","Minnesota","Mississippi"
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Missouri","Nebraska","New Hampshire","New Jesey","New York"
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania"
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Rhode Island","South Carolina","South Dakota","Tennessee","Texas"
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Vermont","Virginia","West Virginia","Wisconsin"
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#All 50 states Unemployment Rate
ggplot(u,aes(x=year,y=av.UR,group=State,colour=State))+geom_line()

#Subsets of 5

#"Alabama","Arkansas","Connecticut","Delaware","Florida"
ggplot(subset(u,State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u, State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Gerogia","Illinois","Iowa","Louisiana","Maine"
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Maryland","Massachusetts","Michigan","Minnesota","Mississippi"
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Missouri","Nebraska","New Hampshire","New Jesey","New York"
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania"
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Rhode Island","South Carolina","South Dakota","Tennessee","Texas"
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Vermont","Virginia","West Virginia","Wisconsin"
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

Univeriate predictor plots

ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2000,stat=identity))

ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2008,stat=identity))

ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2009,stat=identity))+geom_smooth(method="lm")

#Deer
ggplot(ld.pres.total,aes(x=(DeerDensityMax),y=log10(cases),group=DeerDensityMax))+geom_boxplot()
## Warning: Removed 34875 rows containing non-finite values (stat_boxplot).

ggplot(ld.pres.total,aes(x=(DeerDensityMode),y=log10(cases),group=DeerDensityMode))+geom_boxplot()
## Warning: Removed 34875 rows containing non-finite values (stat_boxplot).

#Population: As population increase so do cases
ld.pop.edit<-inner_join(ld.pop,ld.pres.total)
## Joining, by = c("State", "County", "cases", "year", "fips", "prcp", "avtemp", "mintmp", "maxtmp", "SC")
ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2005,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2009,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2008,stat=identity))+geom_smooth(method="lm")

#Temperature

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2009,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2008,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2005,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2006,stat=identity))+geom_smooth(method="lm")

Incidence plots

#Unemployment Rate 
ggplot(z_big,aes(x=av.UR,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

x<-ld.pop.edit %>% group_by(State,year) %>% summarize(tot.cases=sum(cases),av.prcp=mean(prcp),av.temp=mean(avtemp),min.temp=mean(mintmp),max.temp=mean(maxtmp),tot.pop=sum(pop),av.deermax=mean(DeerDensityMax),av.deermode=mean(DeerDensityMode)) %>% mutate(incidence=(100000*(tot.cases/tot.pop)))
## Warning: Grouping rowwise data frame strips rowwise nature
x_big<- x %>% filter(incidence>=5)

#Precipitation: more precipitation, higher incidence; 2012-2013 have mid level rainfall with highest incidence--> result of other variable?
ggplot(x_big,aes(x=av.prcp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

ggplot(x_big,aes(x=av.prcp,y=incidence))+geom_point(data=subset(x_big,year==2012),aes(col=as.factor(State)))+geom_smooth(method=lm)

#Temperature: average max temp has most negative relationship; higher the maximum temperature the lower the incidence 
ggplot(x_big,aes(x=av.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

ggplot(x_big,aes(x=min.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

ggplot(x_big,aes(x=max.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

#Deer
ggplot(data=subset(x_big,year==2000:2007),aes(av.deermax,incidence))+geom_boxplot(aes(col=as.factor(year)))
## Warning in year == 2000:2007: longer object length is not a multiple of
## shorter object length
## Warning: position_dodge requires non-overlapping x intervals

## GGPairs data visualization: from lyme_modeling example

library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
#Big picture correlations

ggpairs(ld.jobs.total,columns = c("cases","prcp","avtemp","UnemploymentRate"))

#Combine all thinned out dataframes
all_av<-inner_join(x,z)
## Joining, by = c("State", "year", "tot.cases", "tot.pop", "incidence")
ggpairs(x,columns=c("tot.cases","av.prcp","av.temp","tot.pop"))

x %<>% mutate(log10totpop=log10(tot.pop))
x %<>% mutate(log10totcases=log10(tot.cases + 1))

ggpairs(x,columns=c("log10totpop","log10totcases","av.prcp","av.temp"))

all_av%<>% mutate(log10totpop=log10(tot.pop))
all_av%<>% mutate(log10totcases=log10(tot.cases + 1))
all_av %<>% mutate(log10in=log10(incidence+1))
ggpairs(all_av,columns=c("log10in","av.prcp","av.UR","av.temp"))

lm_UR_in<-lm(av.UR ~ log10in, data=all_av)
summary(lm_UR_in)
## 
## Call:
## lm(formula = av.UR ~ log10in, data = all_av)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.226 -1.116 -0.048  1.312  3.400 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  10.0279     0.2722  36.842  < 2e-16 ***
## log10in      -1.5061     0.2060  -7.311 1.35e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.545 on 85 degrees of freedom
## Multiple R-squared:  0.3861, Adjusted R-squared:  0.3788 
## F-statistic: 53.45 on 1 and 85 DF,  p-value: 1.351e-10
#The modelr package: nesting method of organization is useful for explanatory modeling
by_state<-ld.pop.edit %>% group_by(State)
## Warning: Grouping rowwise data frame strips rowwise nature
by_state %<>% nest

#Write a function that takes a data frame as its argument and returns a liner model object that predicts size by year
linGrowth_model<-function(df){
  lm(pop ~ year, data = df)
}

#State-wise statistical modeling exercise
models<-purrr::map(by_state$data,linGrowth_model)

#Add a column to the by_state df, where each row(State) has its own model object
by_state%<>%mutate(model=purrr::map(data,linGrowth_model))

library(modelr)
by_state%<>%mutate(resids=purrr::map2(data,model,add_residuals))

#Write a function that accepts an obj of the type in the resids list and returns a sum of the absolute values

sum_resids<-function(x){
  sum(abs(x$resid))
}
by_state%<>%mutate(totalResid=purrr::map(resids,sum_resids))

#Write a function that accepts a linear model and returns the slope
get_slope<-function(model){
  model$coefficients[2]
}
by_state%<>%mutate(slope=purrr::map(model,get_slope))

#Un-nest residual and slope column so not in list format
slopes<-unnest(by_state,slope)
totalResids<-unnest(by_state,totalResid)

#Plot growth rate for all states
slopes%>%ggplot(aes(State,slope))+geom_point()+theme(axis.text.x = element_text(angle=90,hjust=1))

#Plot total residuals for all states
totalResids%>%ggplot(aes(State,totalResid))+geom_point()+theme(axis.text.x=element_text(angle=90,hjust=1))

##Use different data frame name

by_state2 <- ld.pop.edit%>%group_by(State)%>%nest
## Warning: Grouping rowwise data frame strips rowwise nature
#Write a function that accepts an element of the by_state$data list-column and returns the spearman correlation coefficient between Lyme disease cases and precipitation

runCor<-function(df){
  suppressWarnings(cor.test(df$cases,df$prcp,method="spearman")$estimate)
}

by_state2%<>%mutate(spCor=purrr::map(data,runCor))

spCors<-unnest(by_state2,spCor)
spCors%<>%arrange(desc(spCor))
spCors$State<-factor(spCors$State,levels=unique(spCors$State))
ggplot(spCors,aes(State,spCor))+geom_point()+theme(axis.text.x = element_text(angle=90,hjust=1))